{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  from ._conv import register_converters as _register_converters\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.50035296 0.92651365]\n"
     ]
    }
   ],
   "source": [
    "x = np.random.sample((100,2))\n",
    "# make a dataset from a numpy array\n",
    "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
    "\n",
    "iter = dataset.make_one_shot_iterator()\n",
    "el = iter.get_next()\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    print(sess.run(el))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(array([0.33327842, 0.90874317]), array([0.02171065]))\n"
     ]
    }
   ],
   "source": [
    "# using two numpy arrays\n",
    "features, labels = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
    "dataset = tf.data.Dataset.from_tensor_slices((features,labels))\n",
    "\n",
    "iter = dataset.make_one_shot_iterator()\n",
    "el = iter.get_next()\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    print(sess.run(el))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.00786543 0.26009214]\n"
     ]
    }
   ],
   "source": [
    "# using a tensor\n",
    "dataset = tf.data.Dataset.from_tensor_slices(tf.random_uniform([100, 2]))\n",
    "\n",
    "iter = dataset.make_initializable_iterator()\n",
    "el = iter.get_next()\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    sess.run(iter.initializer)\n",
    "    print(sess.run(el))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.03433903 0.7280311 ]\n"
     ]
    }
   ],
   "source": [
    "# using a placeholder\n",
    "x = tf.placeholder(tf.float32, shape=[None,2])\n",
    "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
    "\n",
    "data = np.random.sample((100,2))\n",
    "\n",
    "iter = dataset.make_initializable_iterator()\n",
    "el = iter.get_next()\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    sess.run(iter.initializer, feed_dict={ x: data })\n",
    "    print(sess.run(el))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1]]\n",
      "[[2]\n",
      " [3]]\n",
      "[[3]\n",
      " [4]\n",
      " [5]]\n"
     ]
    }
   ],
   "source": [
    "# from generator\n",
    "sequence = np.array([[[1]],[[2],[3]],[[3],[4],[5]]])\n",
    "\n",
    "def generator():\n",
    "    for el in sequence:\n",
    "        yield el\n",
    "\n",
    "dataset = tf.data.Dataset().batch(1).from_generator(generator,\n",
    "                                           output_types= tf.int64, \n",
    "                                           output_shapes=(tf.TensorShape([None, 1])))\n",
    "\n",
    "iter = dataset.make_initializable_iterator()\n",
    "el = iter.get_next()\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    sess.run(iter.initializer)\n",
    "    print(sess.run(el))\n",
    "    print(sess.run(el))\n",
    "    print(sess.run(el))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[array([1., 2.], dtype=float32), array([0.], dtype=float32)]\n"
     ]
    }
   ],
   "source": [
    "# initializable iterator to switch between data\n",
    "EPOCHS = 10\n",
    "\n",
    "x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])\n",
    "dataset = tf.data.Dataset.from_tensor_slices((x, y))\n",
    "\n",
    "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
    "test_data = (np.array([[1,2]]), np.array([[0]]))\n",
    "\n",
    "iter = dataset.make_initializable_iterator()\n",
    "features, labels = iter.get_next()\n",
    "\n",
    "with tf.Session() as sess:\n",
    "#     initialise iterator with train data\n",
    "    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1]})\n",
    "    for _ in range(EPOCHS):\n",
    "        sess.run([features, labels])\n",
    "#     switch to test data\n",
    "    sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1]})\n",
    "    print(sess.run([features, labels]))\n",
    "\n",
    "    \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[array([0.94182994, 0.26802265]), array([0.81551463])]\n"
     ]
    }
   ],
   "source": [
    "# Reinitializable iterator to switch between Datasets\n",
    "EPOCHS = 10\n",
    "# making fake data using numpy\n",
    "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
    "test_data = (np.random.sample((10,2)), np.random.sample((10,1)))\n",
    "# create two datasets, one for training and one for test\n",
    "train_dataset = tf.data.Dataset.from_tensor_slices(train_data)\n",
    "test_dataset = tf.data.Dataset.from_tensor_slices(test_data)\n",
    "# create a iterator of the correct shape and type\n",
    "iter = tf.data.Iterator.from_structure(train_dataset.output_types,\n",
    "                                           train_dataset.output_shapes)\n",
    "features, labels = iter.get_next()\n",
    "# create the initialisation operations\n",
    "train_init_op = iter.make_initializer(train_dataset)\n",
    "test_init_op = iter.make_initializer(test_dataset)\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    sess.run(train_init_op) # switch to train dataset\n",
    "    for _ in range(EPOCHS):\n",
    "        sess.run([features, labels])\n",
    "    sess.run(test_init_op) # switch to val dataset\n",
    "    print(sess.run([features, labels]))\n",
    "\n",
    "    \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.8552025  0.13344285] [0.24534453]\n",
      "[0.23880187 0.2294315 ] [0.77315474]\n",
      "[0.763904 0.439595] [0.42727667]\n",
      "[0.6563372 0.1366187] [0.02278621]\n",
      "[0.71135175 0.394754  ] [0.8552778]\n",
      "[0.7329701  0.42924434] [0.43608633]\n",
      "[0.8240853 0.7750715] [0.5140434]\n",
      "[0.65556693 0.67978406] [0.8228361]\n",
      "[0.02365288 0.18461536] [0.85140544]\n",
      "[0.48037764 0.7320316 ] [0.773141]\n",
      "[0.6671238 0.8491173] [0.45188755]\n"
     ]
    }
   ],
   "source": [
    "# feedable iterator to switch between iterators\n",
    "EPOCHS = 10\n",
    "# making fake data using numpy\n",
    "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
    "test_data = (np.random.sample((10,2)), np.random.sample((10,1)))\n",
    "# create placeholder\n",
    "x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])\n",
    "# create two datasets, one for training and one for test\n",
    "train_dataset = tf.data.Dataset.from_tensor_slices((x,y))\n",
    "test_dataset = tf.data.Dataset.from_tensor_slices((x,y))\n",
    "# create the iterators from the dataset\n",
    "train_iterator = train_dataset.make_initializable_iterator()\n",
    "test_iterator = test_dataset.make_initializable_iterator()\n",
    "# same as in the doc https://www.tensorflow.org/programmers_guide/datasets#creating_an_iterator\n",
    "handle = tf.placeholder(tf.string, shape=[])\n",
    "iter = tf.data.Iterator.from_string_handle(\n",
    "    handle, train_dataset.output_types, train_dataset.output_shapes)\n",
    "next_elements = iter.get_next()\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    train_handle = sess.run(train_iterator.string_handle())\n",
    "    test_handle = sess.run(test_iterator.string_handle())\n",
    "    \n",
    "    # initialise iterators. In our case we could have used the 'one-shot' iterator instead,\n",
    "    # and directly feed the data insted the Dataset.from_tensor_slices function, but this\n",
    "    # approach is more general\n",
    "    sess.run(train_iterator.initializer, feed_dict={ x: train_data[0], y: train_data[1]})\n",
    "    sess.run(test_iterator.initializer, feed_dict={ x: test_data[0], y: test_data[1]})\n",
    "    \n",
    "    for _ in range(EPOCHS):\n",
    "        x,y = sess.run(next_elements, feed_dict = {handle: train_handle})\n",
    "        print(x, y)\n",
    "        \n",
    "    x,y = sess.run(next_elements, feed_dict = {handle: test_handle})\n",
    "    print(x,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0.70861276 0.91522017]\n",
      " [0.993154   0.74425373]\n",
      " [0.42730845 0.03037355]\n",
      " [0.54031161 0.57429001]]\n"
     ]
    }
   ],
   "source": [
    "# BATCHING\n",
    "BATCH_SIZE = 4\n",
    "x = np.random.sample((100,2))\n",
    "# make a dataset from a numpy array\n",
    "dataset = tf.data.Dataset.from_tensor_slices(x).batch(BATCH_SIZE)\n",
    "\n",
    "iter = dataset.make_one_shot_iterator()\n",
    "el = iter.get_next()\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    print(sess.run(el))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1]\n",
      "[2]\n",
      "[3]\n",
      "[4]\n",
      "[1]\n",
      "[2]\n",
      "[3]\n",
      "[4]\n"
     ]
    }
   ],
   "source": [
    "# REPEAT\n",
    "BATCH_SIZE = 4\n",
    "x = np.array([[1],[2],[3],[4]])\n",
    "# make a dataset from a numpy array\n",
    "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
    "dataset = dataset.repeat()\n",
    "\n",
    "iter = dataset.make_one_shot_iterator()\n",
    "el = iter.get_next()\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    for _ in range(8):\n",
    "        print(sess.run(el))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# MAP\n",
    "x = np.array([[1],[2],[3],[4]])\n",
    "# make a dataset from a numpy array\n",
    "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
    "dataset = dataset.map(lambda x: x*2)\n",
    "\n",
    "iter = dataset.make_one_shot_iterator()\n",
    "el = iter.get_next()\n",
    "\n",
    "with tf.Session() as sess:\n",
    "#     this will run forever\n",
    "        for _ in range(len(x)):\n",
    "            print(sess.run(el))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[3]\n",
      " [1]\n",
      " [2]\n",
      " [4]]\n"
     ]
    }
   ],
   "source": [
    "# SHUFFLE\n",
    "BATCH_SIZE = 4\n",
    "x = np.array([[1],[2],[3],[4]])\n",
    "# make a dataset from a numpy array\n",
    "dataset = tf.data.Dataset.from_tensor_slices(x)\n",
    "dataset = dataset.shuffle(buffer_size=100)\n",
    "dataset = dataset.batch(BATCH_SIZE)\n",
    "\n",
    "iter = dataset.make_one_shot_iterator()\n",
    "el = iter.get_next()\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    print(sess.run(el))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iter: 0, Loss: 0.1913\n",
      "Iter: 1, Loss: 0.1814\n",
      "Iter: 2, Loss: 0.1720\n",
      "Iter: 3, Loss: 0.1631\n",
      "Iter: 4, Loss: 0.1547\n",
      "Iter: 5, Loss: 0.1469\n",
      "Iter: 6, Loss: 0.1397\n",
      "Iter: 7, Loss: 0.1329\n",
      "Iter: 8, Loss: 0.1267\n",
      "Iter: 9, Loss: 0.1210\n"
     ]
    }
   ],
   "source": [
    "# how to pass the value to a model\n",
    "EPOCHS = 10\n",
    "BATCH_SIZE = 16\n",
    "# using two numpy arrays\n",
    "features, labels = (np.array([np.random.sample((100,2))]), \n",
    "                    np.array([np.random.sample((100,1))]))\n",
    "\n",
    "dataset = tf.data.Dataset.from_tensor_slices((features,labels)).repeat().batch(BATCH_SIZE)\n",
    "\n",
    "iter = dataset.make_one_shot_iterator()\n",
    "x, y = iter.get_next()\n",
    "\n",
    "# make a simple model\n",
    "net = tf.layers.dense(x, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input\n",
    "net = tf.layers.dense(net, 8, activation=tf.tanh)\n",
    "prediction = tf.layers.dense(net, 1, activation=tf.tanh)\n",
    "\n",
    "loss = tf.losses.mean_squared_error(prediction, y) # pass the second value from iter.get_net() as label\n",
    "train_op = tf.train.AdamOptimizer().minimize(loss)\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    sess.run(tf.global_variables_initializer())\n",
    "    for i in range(EPOCHS):\n",
    "        _, loss_value = sess.run([train_op, loss])\n",
    "        print(\"Iter: {}, Loss: {:.4f}\".format(i, loss_value))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3\n",
      "Training...\n",
      "Iter: 0, Loss: 1.4389\n",
      "Iter: 1, Loss: 1.4704\n",
      "Iter: 2, Loss: 1.4081\n",
      "Iter: 3, Loss: 1.2877\n",
      "Iter: 4, Loss: 1.1842\n",
      "Iter: 5, Loss: 1.1944\n",
      "Iter: 6, Loss: 1.1166\n",
      "Iter: 7, Loss: 0.9924\n",
      "Iter: 8, Loss: 0.8997\n",
      "Iter: 9, Loss: 0.8817\n",
      "Test Loss: 0.836423\n"
     ]
    }
   ],
   "source": [
    "# Wrapping all together -> Switch between train and test set using Initializable iterator\n",
    "EPOCHS = 10\n",
    "# create a placeholder to dynamically switch between batch sizes\n",
    "batch_size = tf.placeholder(tf.int64)\n",
    "BATCH_SIZE = 32\n",
    "\n",
    "x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])\n",
    "dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size).repeat()\n",
    "\n",
    "# using two numpy arrays\n",
    "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
    "test_data = (np.random.sample((20,2)), np.random.sample((20,1)))\n",
    "\n",
    "iter = dataset.make_initializable_iterator()\n",
    "features, labels = iter.get_next()\n",
    "# make a simple model\n",
    "net = tf.layers.dense(features, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input\n",
    "net = tf.layers.dense(net, 8, activation=tf.tanh)\n",
    "prediction = tf.layers.dense(net, 1, activation=tf.tanh)\n",
    "\n",
    "loss = tf.losses.mean_squared_error(prediction, labels) # pass the second value from iter.get_net() as label\n",
    "train_op = tf.train.AdamOptimizer().minimize(loss)\n",
    "\n",
    "n_batches = train_data[0].shape[0] // BATCH_SIZE\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    sess.run(tf.global_variables_initializer())\n",
    "    # initialise iterator with train data\n",
    "    sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1], batch_size: BATCH_SIZE})\n",
    "    print('Training...')\n",
    "    for i in range(EPOCHS):\n",
    "        tot_loss = 0\n",
    "        for _ in range(n_batches):\n",
    "            _, loss_value = sess.run([train_op, loss])\n",
    "            tot_loss += loss_value\n",
    "        print(\"Iter: {}, Loss: {:.4f}\".format(i, tot_loss / n_batches))\n",
    "    # initialise iterator with test data\n",
    "    sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1], batch_size: test_data[0].shape[0]})\n",
    "    print('Test Loss: {:4f}'.format(sess.run(loss)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training...\n",
      "Iter: 0, Loss: 0.1602\n",
      "Iter: 1, Loss: 0.1191\n",
      "Iter: 2, Loss: 0.0964\n",
      "Iter: 3, Loss: 0.0907\n",
      "Iter: 4, Loss: 0.0738\n",
      "Iter: 5, Loss: 0.0819\n",
      "Iter: 6, Loss: 0.0728\n",
      "Iter: 7, Loss: 0.0881\n",
      "Iter: 8, Loss: 0.0765\n",
      "Iter: 9, Loss: 0.0729\n",
      "Test Loss: 0.091081\n"
     ]
    }
   ],
   "source": [
    "# Wrapping all together -> Switch between train and test set using Reinitializable iterator\n",
    "EPOCHS = 10\n",
    "# create a placeholder to dynamically switch between batch sizes\n",
    "batch_size = tf.placeholder(tf.int64)\n",
    "\n",
    "x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])\n",
    "train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size).repeat()\n",
    "test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(batch_size) # always batch even if you want to one shot it\n",
    "# using two numpy arrays\n",
    "train_data = (np.random.sample((100,2)), np.random.sample((100,1)))\n",
    "test_data = (np.random.sample((20,2)), np.random.sample((20,1)))\n",
    "\n",
    "# create a iterator of the correct shape and type\n",
    "iter = tf.data.Iterator.from_structure(train_dataset.output_types,\n",
    "                                           train_dataset.output_shapes)\n",
    "features, labels = iter.get_next()\n",
    "# create the initialisation operations\n",
    "train_init_op = iter.make_initializer(train_dataset)\n",
    "test_init_op = iter.make_initializer(test_dataset)\n",
    "\n",
    "# make a simple model\n",
    "net = tf.layers.dense(features, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input\n",
    "net = tf.layers.dense(net, 8, activation=tf.tanh)\n",
    "prediction = tf.layers.dense(net, 1, activation=tf.tanh)\n",
    "\n",
    "loss = tf.losses.mean_squared_error(prediction, labels) # pass the second value from iter.get_net() as label\n",
    "train_op = tf.train.AdamOptimizer().minimize(loss)\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    sess.run(tf.global_variables_initializer())\n",
    "    # initialise iterator with train data\n",
    "    sess.run(train_init_op, feed_dict = {x : train_data[0], y: train_data[1], batch_size: 16})\n",
    "    print('Training...')\n",
    "    for i in range(EPOCHS):\n",
    "        tot_loss = 0\n",
    "        for _ in range(n_batches):\n",
    "            _, loss_value = sess.run([train_op, loss])\n",
    "            tot_loss += loss_value\n",
    "        print(\"Iter: {}, Loss: {:.4f}\".format(i, tot_loss / n_batches))\n",
    "    # initialise iterator with test data\n",
    "    sess.run(test_init_op, feed_dict = {x : test_data[0], y: test_data[1], batch_size:len(test_data[0])})\n",
    "    print('Test Loss: {:4f}'.format(sess.run(loss)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'sentiment': <tf.Tensor 'IteratorGetNext_15:0' shape=(?,) dtype=int32>, 'text': <tf.Tensor 'IteratorGetNext_15:1' shape=(?,) dtype=string>}\n",
      "[array([b\"@MENTION, i agree! i'm trying to finish my thesis..and so far it's not going anywhere\",\n",
      "       b'@MENTION erm, sacre coeur even... james -1',\n",
      "       b\"@MENTION now am depressed and br'3ii is sleepin since i came home\",\n",
      "       b'just finishing what turned out to be a nice day',\n",
      "       b\"it's over. it was great. dollhouse\",\n",
      "       b'just got bck from cross-country practice wooo 3 miles!',\n",
      "       b'@MENTION you mean a man who cheats on his wife habitually and is a complete hypocrite... they have plenty of those already.',\n",
      "       b'i feel bad for che ming wang though, he cant seem to get it together... and has an era of 30+',\n",
      "       b'@MENTION @MENTION ok.. so i tweeted about the rains and it is not raining anymore. atleast i wont have to water the plants tomorrow.',\n",
      "       b'is gutted about katie & peter love them!',\n",
      "       b\"finally home... long day... had a great time with vet's from every era!\",\n",
      "       b\"@MENTION yucky!!! for whatevv reason i can't eat 5 guys anymore. just makes me gag!!!\",\n",
      "       b'@MENTION ever been to the antiques roadshow? my favorite show',\n",
      "       b'lovin virginia! hopefully ill pick up an accent 4 a lil while',\n",
      "       b\"@MENTION yea me too i just drink a cup of tea, but don't worry i got some snacks with me\",\n",
      "       b'got what could quite possibly be the worst paper cut ever today. corners of file folders = paper daggers. ouchie.',\n",
      "       b'sneaking in some computer time. wish you all a good day!',\n",
      "       b\"@MENTION i'm pretty bored with it too\",\n",
      "       b'too early to call a landslide victory for m14? lebanonelections',\n",
      "       b\"school has ruined me so much that i don't even know how to sleep in anymore\",\n",
      "       b\"is at work! it's been a long weekend\",\n",
      "       b'feeling a bit sick also very bored!',\n",
      "       b'misses mr. hollinger. misses callin him hubby more than anything',\n",
      "       b'616 words i loooveeee jackson rathbone <\\xc3\\xb4\\xc3\\xb8\\xcf\\x89',\n",
      "       b'sucky day.. first i havta take the bus 2 work then it breaks down nd work suckd of course',\n",
      "       b'back from skaterhockey. crazy old bears 8 @MENTION reloaded 2. 6 goals against us in the last 20 mins',\n",
      "       b'this is sad! john and kate are officially filing for divorce...as bad as the times have been this still shocks me.',\n",
      "       b'@MENTION i wish i could',\n",
      "       b'@MENTION and what do u see!?!? *raises an eyebrow in amusement*',\n",
      "       b'@MENTION helloooo promm dress..ahh i need to get onee :| whens yours ? debenhams av a salee x',\n",
      "       b'@MENTION heyy! thx 4 making \"back around+lyrics\" gosshh! its totally amazing ! amazing work u got there!',\n",
      "       b'@MENTION aside from the use of \"sex\" as a term, i like that latest post'],\n",
      "      dtype=object), array([0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0,\n",
      "       0, 1, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int32)]\n"
     ]
    }
   ],
   "source": [
    "# load a csv\n",
    "CSV_PATH = './tweets.csv'\n",
    "dataset = tf.contrib.data.make_csv_dataset(CSV_PATH, batch_size=32)\n",
    "iter = dataset.make_one_shot_iterator()\n",
    "next = iter.get_next()\n",
    "print(next) # next is a dict with key=columns names and value=column data\n",
    "inputs, labels = next['text'], next['sentiment']\n",
    "\n",
    "with  tf.Session() as sess:\n",
    "    print(sess.run([inputs,labels]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "log_time = {}\n",
    "# copied form https://medium.com/pythonhive/python-decorator-to-measure-the-execution-time-of-methods-fa04cb6bb36d\n",
    "def how_much(method):\n",
    "    def timed(*args, **kw):\n",
    "        ts = time.time()\n",
    "        result = method(*args, **kw)\n",
    "        te = time.time()\n",
    "        \n",
    "        if 'log_time' in kw:\n",
    "            name = kw.get('log_name', method.__name__)\n",
    "            kw['log_time'][name] = (te - ts)\n",
    "            \n",
    "        return result\n",
    "    return timed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "((5000, 32, 32), (5000, 20)) ((1000, 32, 32), (1000, 20))\n"
     ]
    }
   ],
   "source": [
    "# benchmark\n",
    "import time\n",
    "DATA_SIZE = 5000\n",
    "DATA_SHAPE = ((32,32),(20,))\n",
    "BATCH_SIZE = 64 \n",
    "N_BATCHES = DATA_SIZE // BATCH_SIZE\n",
    "EPOCHS = 10\n",
    "\n",
    "test_size = (DATA_SIZE//100)*20 \n",
    "\n",
    "train_shape = ((DATA_SIZE, *DATA_SHAPE[0]),(DATA_SIZE, *DATA_SHAPE[1]))\n",
    "test_shape = ((test_size, *DATA_SHAPE[0]),(test_size, *DATA_SHAPE[1]))\n",
    "print(train_shape, test_shape)\n",
    "train_data = (np.random.sample(train_shape[0]), np.random.sample(train_shape[1]))\n",
    "test_data = (np.random.sample(test_shape[0]), np.random.sample(test_shape[1])) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[None, 32, 32] [None, 20]\n",
      "one_shot\n",
      "0\n",
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n",
      "initialisable\n",
      "0\n",
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n",
      "reinitializable\n",
      "0\n",
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n",
      "feedable\n",
      "0\n",
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[(1.5659220218658447, 'reinitializable'),\n",
       " (1.581655740737915, 'initialisable'),\n",
       " (1.7346899509429932, 'feedable'),\n",
       " (2.3557801246643066, 'one_shot')]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# used to keep track of the methodds\n",
    "log_time = {}\n",
    "\n",
    "tf.reset_default_graph()\n",
    "sess = tf.InteractiveSession()\n",
    "\n",
    "input_shape = [None, *DATA_SHAPE[0]] # [None, 64, 64, 3]\n",
    "output_shape = [None,*DATA_SHAPE[1]] # [None, 20]\n",
    "print(input_shape, output_shape)\n",
    "\n",
    "x, y = tf.placeholder(tf.float32, shape=input_shape), tf.placeholder(tf.float32, shape=output_shape)\n",
    "\n",
    "@how_much\n",
    "def one_shot(**kwargs):\n",
    "    print('one_shot')\n",
    "    train_dataset = tf.data.Dataset.from_tensor_slices(train_data).batch(BATCH_SIZE).repeat()\n",
    "    train_el = train_dataset.make_one_shot_iterator().get_next()\n",
    "    \n",
    "    test_dataset = tf.data.Dataset.from_tensor_slices(test_data).batch(BATCH_SIZE).repeat()\n",
    "    test_el = test_dataset.make_one_shot_iterator().get_next()\n",
    "    for i in range(EPOCHS):\n",
    "        print(i)\n",
    "        for _ in range(N_BATCHES):\n",
    "            sess.run(train_el)\n",
    "        for _ in range(N_BATCHES):\n",
    "            sess.run(test_el)\n",
    "            \n",
    "@how_much\n",
    "def initialisable(**kwargs):\n",
    "    print('initialisable')\n",
    "    dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(BATCH_SIZE).repeat()\n",
    "\n",
    "    iter = dataset.make_initializable_iterator()\n",
    "    elements = iter.get_next()\n",
    "    \n",
    "    for i in range(EPOCHS):\n",
    "        print(i)\n",
    "        sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1]})\n",
    "        for _ in range(N_BATCHES):\n",
    "            sess.run(elements)\n",
    "        sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1]})\n",
    "        for _ in range(N_BATCHES):\n",
    "            sess.run(elements)\n",
    "@how_much            \n",
    "def reinitializable(**kwargs):\n",
    "    print('reinitializable')\n",
    "    # create two datasets, one for training and one for test\n",
    "    train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()\n",
    "    test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()\n",
    "    # create a iterator of the correct shape and type\n",
    "    iter = tf.data.Iterator.from_structure(train_dataset.output_types,\n",
    "                                               train_dataset.output_shapes)\n",
    "    elements = iter.get_next()\n",
    "    # create the initialisation operations\n",
    "    train_init_op = iter.make_initializer(train_dataset)\n",
    "    test_init_op = iter.make_initializer(test_dataset)\n",
    "    \n",
    "    for i in range(EPOCHS):\n",
    "        print(i)\n",
    "        sess.run(train_init_op, feed_dict={ x: train_data[0], y: train_data[1]})\n",
    "        for _ in range(N_BATCHES):\n",
    "            sess.run(elements)\n",
    "        sess.run(test_init_op, feed_dict={ x: test_data[0], y: test_data[1]})\n",
    "        for _ in range(N_BATCHES):\n",
    "            sess.run(elements)\n",
    "@how_much            \n",
    "def feedable(**kwargs):\n",
    "    print('feedable')\n",
    "    # create two datasets, one for training and one for test\n",
    "    train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()\n",
    "    test_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE).repeat()\n",
    "    # create the iterators from the dataset\n",
    "    train_iterator = train_dataset.make_initializable_iterator()\n",
    "    test_iterator = test_dataset.make_initializable_iterator()\n",
    "\n",
    "    handle = tf.placeholder(tf.string, shape=[])\n",
    "    iter = tf.data.Iterator.from_string_handle(\n",
    "        handle, train_dataset.output_types, train_dataset.output_shapes)\n",
    "    elements = iter.get_next()\n",
    "\n",
    "    train_handle = sess.run(train_iterator.string_handle())\n",
    "    test_handle = sess.run(test_iterator.string_handle())\n",
    "\n",
    "    sess.run(train_iterator.initializer, feed_dict={ x: train_data[0], y: train_data[1]})\n",
    "    sess.run(test_iterator.initializer, feed_dict={ x: test_data[0], y: test_data[1]})\n",
    "\n",
    "    for i in range(EPOCHS):\n",
    "        print(i)\n",
    "        for _ in range(N_BATCHES):\n",
    "            sess.run(elements, feed_dict={handle: train_handle})\n",
    "        for _ in range(N_BATCHES):\n",
    "            sess.run(elements, feed_dict={handle: test_handle})\n",
    "            \n",
    "one_shot(log_time=log_time)\n",
    "initialisable(log_time=log_time)\n",
    "reinitializable(log_time=log_time)\n",
    "feedable(log_time=log_time)\n",
    "\n",
    "sorted((value,key) for (key,value) in log_time.items())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
